This notebook contains code to replicate analyses reported in CHI submission.

In VISUALIZATION VIBES project Study 2, participants completed an attitutde eliciation survey, asking questions about their attitude toward (5) stimulus images (data visualizations). Each participant was randomly assigned to one of 6 stimulus blocks, each containing 1 image from each of (4) categories (ranging from most abstract to most figural). Each participant started by responding to questions for a single ‘common block’ stimulus (that is thus super-powered as it was seen by all participants). Two participant recruitment pools were used: Prolific, with a smaller set of participants recruited from Tumblr (to replicate results of interview Study 1 with participants sourced from Tumblr).

1 SETUP

We start by importing data files previously wrangled in 0_VIBES_S2_wrangling.Rmd.

############## IMPORT REFERENCE FILES
ref_stimuli <- readRDS("data/input/REFERENCE/ref_stimuli.rds")
ref_surveys <- readRDS("data/input/REFERENCE/ref_surveys.rds")
############## IMPORT DATA FILES
# df_data <- readRDS("data/output/df_data.rds") #1 row per participant — WIDE
df_participants <- readRDS("data/output/df_participants.rds") #1 row per participant — demographic
# df_questions <- readRDS("data/output/df_questions.rds") #1 row per question — LONG
# df_sd_questions_wide <- readRDS("data/output/df_sd_questions_wide.rds") — only sd questions WIDE
# df_sd_questions_long <- readRDS("data/output/df_sd_questions_long.rds") - only sd questions LONG
df_tools <- readRDS("data/output/df_tools.rds") #multiselect format for tools Question
df_actions <- readRDS("data/output/df_actions.rds") # multiselect format for action Question
# df_graphs_full <- readRDS("data/output/df_graphs_full.rds") #includes free response data
df_graphs <- readRDS("data/output/df_graphs.rds") #only categorical and numeric questions
############## SETUP Graph Labels
ref_stim_id <- levels(ref_stimuli$ID)
ref_cat_questions <- c("MAKER_ID","MAKER_AGE","MAKER_GENDER")
ref_free_response <- c("MAKER_DETAIL", "MAKER_EXPLAIN", "TOOL_DETAIL", "CHART_EXPLAIN")
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- c("MAKER_DESIGN","MAKER_DATA","MAKER_POLITIC",
               "MAKER_ARGUE","MAKER_SELF","MAKER_ALIGN","MAKER_TRUST",
               "CHART_LIKE", "CHART_BEAUTY", "CHART_INTENT", "CHART_TRUST")
left <- c("professional","professional","left-leaning","confrontational",
          "altruistic","does NOT share","untrustworthy",
          "NOT at all","NOT at all", "inform", "untrustworthy")
right <- c("layperson","layperson","right-leaning","diplomatic",
           "selfish", "DOES share", "trustworthy",
           "very much", "very much", "persuade", "trusthworthy")
ref_labels <- as.data.frame(cbind(left,right))
rownames(ref_labels) <- ref_sd_questions
# ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
ref_blocks <- c(1,2,3,4,5,6)
rm(left,right)
############## SETUP Colour Palettes
#https://www.r-bloggers.com/2022/06/custom-colour-palettes-for-ggplot2/

## list of color pallettes
my_colors = list(
  politics = c("#184aff","#5238bf", "#4f4a52" ,"#84649c", "#ff0000"),
  blackred = c("black","red"),
  greens = c("#ADC69D","#81A06D","#567E39","#2D5D16","#193E0A"),
  smallgreens = c("#ADC69D","#567E39","#193E0A"),
  olives = c("#CDCEA1","#B8B979","#A0A054","#78783F","#50502A","#35351C"),
  lightblues = c("#96C5D2","#61A2B2","#3C8093","#2C6378","#1F4A64"),
  darkblues = c("#7AAFE1","#3787D2","#2A73B7","#225E96","#1A4974","#133453"),
  reds = c("#D9B8BD","#CE98A2","#B17380","#954E5F","#78263E","#62151F")
)



## function for using palettes
my_palettes = function(name, n, all_palettes = my_colors, type = c("discrete","continuous"), direction = c("1","-1")) {
  palette = all_palettes[[name]]
  if (missing(n)) {
    n = length(palette)
  }
  type = match.arg(type)
  out = switch(type,
               continuous = grDevices::colorRampPalette(palette)(n),
               discrete = palette[1:n]
  )
  out = switch(direction,
               "1" = out,
               "-1" = palette[n:1])
  structure(out, name = name, class = "palette")
}
############## RETURNS SD STACKED AND COLORED BY BY X
## LOOP STYLE
multi_sd <- function (df, left, right, x, y, color) {

  # g <- ggplot(df, aes(y = .data[[x]], x = {{y}}, color = {{color}}))+
  g <- ggplot(df, aes(y = .data[[x]], x = .data[[y]], color = .data[[color]]))+
  geom_boxplot(width = 0.5) +
  geom_jitter(width = 0.1, alpha=0.5) +
    
  scale_y_continuous(limits=c(-1,101)) +
  labs(x="", y="") +
  coord_flip() +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) + theme_minimal()

  return(g)
}


############## RETURNS SINGLE SD 
## LOOP STYLE
single_sd <- function (df, left, right, x) {

  g <- ggplot(df, aes(y = {{x}}, x = ""))+
  geom_boxplot(width = 0.5) +
  geom_jitter(width = 0.1, alpha=0.5) +
  scale_y_continuous(limits=c(-1,101)) +
  labs(x="", y="") +
  coord_flip() +
  guides(
    y = guide_axis_manual(labels = left),
    y.sec = guide_axis_manual(labels = right)
  ) + theme_minimal()

  return(g)
}



######## RETURNS SINGLE SD
##  APPLY STYLE
# plot_sd = function (data, column, type, split, boxplot) {
# 
#   ggplot(df, aes(y = .data[[column]], x="")) +
#     {if(boxplot) geom_boxplot(width = 0.5) } +
#     geom_jitter(width = 0.1, alpha=0.3, {if(split) aes(color=Distribution)}) +
#     {if(split) facet_grid(Distribution ~ .)} +
#     scale_y_continuous(limits=c(-1,101)) +
#     labs(x="", y="") +
#     coord_flip()  +
#     {if(type == "S")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[column,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
#       )} +
#     {if(type == "Q")
#       guides(
#         y = guide_axis_manual(labels = ref_labels[q,"left"]),
#         y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
#       )} +
#   theme_minimal()  +
#      labs (
#        caption = column
#      )
# }

######## RETURNS SINGLE SD
##  APPLY STYLE
plot_sd = function (data, column, type, facet, facet_by, boxplot) {

  ggplot(df, aes(y = .data[[column]], x="")) +
    {if(boxplot) geom_boxplot(width = 0.5) } +
    geom_jitter(width = 0.1, alpha=0.3, {if(facet) aes(color=.data[[facet_by]])}) +
    {if(facet) facet_grid(.data[[facet_by]] ~ .)} +
    scale_y_continuous(limits=c(-1,101)) +
    labs(x="", y="") +
    coord_flip()  +
    {if(type == "S")
      guides(
        y = guide_axis_manual(labels = ref_labels[column,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
      )} +
    {if(type == "Q")
      guides(
        y = guide_axis_manual(labels = ref_labels[q,"left"]),
        y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
      )} +
  theme_minimal()  +
     labs (
       caption = column
     ) + easy_remove_legend()
}

2 DESCRIBE SAMPLE

2.1 Sample Demographics

df <- df_participants

## FOR DESCRIPTIVES PARAGRAPH
# #PROLIFIC
df.p <- df %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table()
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)

# #TUMBLR
df.t <- df %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table()
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)

For study 2, a total of 318 participants were recruited from US-located English speaking users of TUMBLR (n = 78) and PROLIFIC (n = 240).

240 individuals from PROLIFIC participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).

78 individuals from Tumblr participated in Study 2, ( 36% Female, 5% Male, 40% Non-binary, 19% Other). Note that a higher proportion of participants recruited from TUMBLR report identities other than cis-gender Female and cis-gender Male.

2.2 Study Response Time

df <- df_participants

## for descriptives paragraph
p.desc.duration <- psych::describe(df %>% filter(Distribution=="PROLIFIC") %>% pull(duration.min))
t.desc.duration <- psych::describe(df %>% filter(Distribution=="TUMBLR") %>% pull(duration.min))

PROLIFIC SAMPLE (n = 240 ) participant response times ranged from 13.97 to 216.18 minutes, with a mean response time of 42.49 minutes, SD = 21.15.

TUMBLR SAMPLE (n = 78 ) participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 51.93 minutes, SD = 35.47.

rm(df, df.p, df.t, p.desc.duration, t.desc.duration, desc.gender.p, desc.gender.t, p_participants, t_participants)

3 BLOCK 2 ANALYSIS

In the interest of space, for the manuscript we report analysis of one stimulus block (stimulus block == 2; pictured in the teaser image), rather than analysis of each of the six blocks. Each block contains for images, one of each “abstraction category” ranging from [1] most abstract (at left) to [4] most figural (at right). Within each block we selected visualizations such that there was diversity in: (1) chart type (2) source (i.e. where the graphic actually came from), and (3) what we as authors perceived to be aesthetic “vibe”, (i.e) a diversity of what we subjectively judged to be sophisticated and unsophisticated, aesthetically pleasing, not pleasing, etc. The goal in structuring stimulus blocks was to offer each participant a set of visualizations that varied in multiple ways.

BLOCK 2 STIMULI
B2-1 B2-2 B2-3 B2-4
“Bluesy Economist Stacked Area” “Significant Excel Error Bars” “Slick Desert Heat Map” “Holmesian Hospital Beds”

3.1 SAMPLE

#block 2 participant-level data 
df_b2_p <- df_participants %>% filter(Assigned.Block ==2)

#block 2 stimulus-level data 
df_b2 <- df_graphs %>% 
  filter(str_detect(STIMULUS,"B2"))
# %>%
#   mutate(MAKER_ID = fct_rev(MAKER_ID))

(n = 52 ) survey respondents answered questions about the (4) stimuli in block 2, yielding (o = 208) observations.

3.2 MAKER QUESTIONS

3.2.1 MAKER ID

Participants were asked:

Who do you think is most likely responsible for having this image created?
The response is stored as MAKER_ID

3.2.1.1 Maker ID by Stimulus

#FILTER DATASET
df <- df_b2

## A
## MAKER IDENTIFICATION BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_ID)) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER ID by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="reds", direction = "1"), name = "",  guide = guide_legend(reverse = FALSE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  theme_minimal()
##############################



## B
## MAKER IDENTIFICATION BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
B <-
  ggbarstats( data = dx, 
            x = MAKER_ID, y = STIMULUS,
            legend.title = "MAKER ID",
            results.subtitle = FALSE) + 
    scale_fill_manual(values = my_palettes(name="reds", direction = "1"))+
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "MAKER ID BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER ID REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- dx$MAKER_ID %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, MAKER_ID, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = MAKER_ID
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = node, 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color = "white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  labs(title = "MAKER ID by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "MAKER ID",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, dx, order)

To explore whether variance in maker identifications is more likely a function of participant or stimulus , we visualize the MAKER_ID data via an alluvial diagram. Here we see that there are no stable identifications (ie. paths through each stimulus with the same category) indicating that no participants had a strong preference for a particular identification category. Rather, participants seem to be identifying the maker category as a function of some features of the stimuli, rather than a trait/state inference about makers of all stimuli in general.

3.2.1.2 Maker ID Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as MAKER_CONF .

## DESCRIBE MAKER CONFIDENCE
df <- df_b2 %>% 
  select(PID, STIMULUS, MAKER_ID, MAKER_CONF) 

desc.maker_conf <- psych::describe(df$MAKER_CONF)

## H 
## Distribution of MAKER_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = MAKER_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Maker ID Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.maker_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker identifications] ranging from1 to 100 , with a mean of 64.7. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty).

3.2.1.3 Maker ID Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_ID, MAKER_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(MAKER_CONF), 
         max = max(MAKER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_ID, MAKER_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(MAKER_CONF), 
         max = max(MAKER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(STIMULUS,MAKER_CONF)) + 
  geom_boxplot(width = 0.3)+ 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) + 
  labs(
    title = "Confidence in Maker ID",
    y = "MAKER ID CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.maker_conf$min, " to ", desc.maker_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################


# MAKER_CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = MAKER_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER ID CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in maker identification differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( 31 of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale, with the exception of two subjects near 50%. The mean confidence score for each stimulus (blue) is greater than 50%, and no participants indicated consistently low confidence across all stimuli, indicating that in general, participants were able to respond thoughtfully to the maker identification question.

3.2.1.4 Maker ID & Confidence (by Stimulus)

df <- df_b2

## MC
## MAKER_CONFIDENCE by IDENTIFICATION  
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = MAKER_CONF, color = MAKER_ID)) + 
  geom_boxplot(position=position_dodge(0.8), width = 0.5)+
  geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
  scale_color_manual(values = my_palettes(name="reds", direction = "1")) +   
  # scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "", 
        x = "", y = "Maker ID Confidence") + 
  theme_minimal() 
###################################################


# CONFIDENCE by ID
C <- ggplot(df, aes( x = (MAKER_ID), y = AGE_CONF, color = (MAKER_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_manual(values = my_palettes(name="reds", direction = "1"), guide = guide_legend(reverse = FALSE)) +   
  labs (x = "MAKER ID") + 
  easy_add_legend_title("MAKER ID ")+
  theme_minimal() 


# DISPLAY GRAPHS
 (p <- ( B/MC / C)  + plot_annotation(
   title = "MAKER ID & CONFIDENCE" ,
   subtitle = '',
   caption = ''))

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.maker_conf, p_lowvar)

Inspecting Maker ID Confidence by Maker ID category, there does not appear to be any systematic relationship between the category of Maker a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of MAKER(ID) = ORGANIZATION , for example, are made with greater or less confidence than identifications of MAKER(ID) = INDIVIDUAL. Statistical tests of independence (between confidence and maker id) are ill-advised because the number of observations of each maker ID vary greatly between stimuli.

Taken together, this analysis of correspondence between MAKER_ID and MAKER_CONF suggests that both the inference identifying the kind of maker as well as the participant’s confidence in that inference are likely conditioned on some features of the stimulus rather than more stable state/trait(s) of the participant.

3.2.2 MAKER AGE

Participants were asked:
Take a moment to imagine the person(s) responsible for creating the image
What generation are they most likely from?
The response was saved as MAKER_AGE

3.2.2.1 Maker AGE by Stimulus

#FILTER DATASET
df <- df_b2

## A
## MAKER AGE BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_AGE)) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER AGE by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +   
    #scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()
##############################


## B
## MAKER AGE BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate(MAKER_AGE = fct_rev(MAKER_AGE))
B <- ggbarstats( data = dx, 
            x = MAKER_AGE, y = STIMULUS,
            legend.title = "MAKER AGE",
            results.subtitle = FALSE)  +
    scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +   
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "MAKER AGE BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER AGE REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- dx$MAKER_AGE %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, MAKER_AGE, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = MAKER_AGE
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = node, 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color="white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="lightblues", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  
  labs(title = "MAKER AGE by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "MAKER AGE",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, dx, order)

To explore whether variance in maker age attributions are more likely a function of participant or stimulus , we visualize the MAKER_AGE data via an alluvial diagram. Here we see that (unlike MAKER_ID) there are a number of stable identifications (ie. paths through each stimulus with the same category) indicating that some participants consistently attributed all stimuli to a particular age category. This occured most often with the ‘Millenial’ and ‘GenX’ age categories. However, this pattern may be confounded by both the true base rate of maker age (inferred based on working age of likely makers).

3.2.2.2 Maker AGE Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as AGE_CONF .

## DESCRIBE MAKER AGE CONFIDENCE
df <- df_b2 %>% 
  select(PID, STIMULUS, MAKER_AGE, AGE_CONF) 

desc.age_conf <- psych::describe(df$AGE_CONF)

## H 
## Distribution of AGE_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = AGE_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Maker AGE Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.age_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker age attribution] ranging from8 to 100 , with a mean of 62.1. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty).

3.2.2.3 Maker AGE Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_AGE, AGE_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(AGE_CONF), 
         max = max(AGE_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_graphs %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_AGE, AGE_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(AGE_CONF), 
         max = max(AGE_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(x = STIMULUS, y = AGE_CONF)) + 
  scale_y_continuous(limits=c(0,100))+
  geom_boxplot(width = 0.3) + 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1"))+
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
  labs(
    title = "Confidence in Maker AGE",
    y = "MAKER AGE CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.age_conf$min, " to ", desc.age_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################



# MAKER_AGE CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = AGE_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER AGE CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in maker age differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( r* p_lowvar of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale, with the exception of (2) participants who have low variance scores clustered around 50% and (2) participants clustered around 25%. The mean confidence score for each stimulus (blue) is greater than 50%, and no participants indicated consistently low confidence across all stimuli, indicating that in general, participants were able to respond thoughtfully to the maker age question.

3.2.2.4 Maker AGE & Confidence (by Stimulus)

df <- df_b2

## MC
## MAKER AGE CONFIDENCE by IDENTIFICATION  
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = AGE_CONF, color = MAKER_AGE)) + 
  geom_boxplot(position=position_dodge(0.8), width = 0.5)+
  geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
  scale_color_manual(values = my_palettes(name="lightblues", direction = "1")) +   
  # scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "", 
        x = "", y = "Maker Age Confidence") + 
  theme_minimal() 
###################################################


# CONFIDENCE by ID
C <- ggplot(df, aes( x = (MAKER_AGE), y = AGE_CONF, color = (MAKER_AGE))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_manual(values = my_palettes(name="lightblues", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  labs (x = "MAKER AGE") + 
  easy_add_legend_title("MAKER AGE")+
  theme_minimal() 


# DISPLAY GRAPHS
 (p <- ( B/MC / C)  + plot_annotation(
   title = "MAKER AGE & CONFIDENCE" ,
   subtitle = '',
   caption = ''))

Inspecting Maker AGE Confidence by Maker AGE category, there does not appear to be any systematic relationship between the category of Maker AGE a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of MAKER(AGE) = MILLENIAL , for example, are made with greater or less confidence than identifications of MAKER(AGE) = BOOMER. Statistical tests of independence (between confidence and maker age) are ill-advised because the number of observations of each maker age vary greatly between stimuli.

Taken together, this analysis of correspondence between MAKER AGE and MAKER AGE CONFIDENCE suggests that both the inference identifying the age generation of maker (and the participant’s confidence in that inference) are conditioned on some features of the stimulus rather than more stable state/trait(s) of the participant.

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.age_conf, p_lowvar)

3.2.3 MAKER GENDER

Participants were asked:
Take a moment to imagine the person(s) responsible for creating the image
What gender do they most likely identify with?
Responses were stored as MAKER_GENDER.

3.2.3.1 Maker GENDER by Stimulus

#FILTER DATASET
df <- df_b2

## A
## MAKER GENDER BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART 
A <- ggplot( df, aes( x = (STIMULUS), fill = fct_rev(MAKER_GENDER))) +
  geom_bar(position = "stack") +
  facet_grid( Distribution ~ ., scales = "free", space = "free") + 
  labs( title = "MAKER GENDER by Stimulus and SAMPLE",
        subtitle = "", x = "") +
  scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +   
    #scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
  # coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()
##############################


## B
## MAKER GENDER BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate(MAKER_GENDER = (MAKER_GENDER))
B <- ggbarstats( data = dx, 
            x = MAKER_GENDER, y = STIMULUS,
            legend.title = "MAKER GENDER",
            results.subtitle = FALSE)  +
    scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +   
    # scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
    labs( title = "MAKER GENDER BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

## S
## MAKER GENDER REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- df_b2$MAKER_GENDER %>% levels   #list order of factor levels
ds <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(STIMULUS, MAKER_GENDER, PID, Distribution) %>% 
#pivot wider 
  pivot_wider(
    names_from = STIMULUS, 
    values_from = MAKER_GENDER
  ) %>%
#prep for geom_sankey
  make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>% 
  mutate(
    match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
    node = factor(node, levels = order),
    next_node = factor(next_node, levels = order)
  ) 
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x, 
               next_x = next_x, 
               node = node, 
               next_node = next_node,
               fill = node, 
               ))+
  geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color="white") +
  geom_alluvial_text(aes( x = as.numeric(x),  label = after_stat(freq)),
          size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
  scale_fill_manual(values = my_palettes(name="smallgreens", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
  
  labs(title = "MAKER GENDER by STIMULUS", 
       x = "STIMULUS", y = "(count)", fill = "MAKER GENDER",
       caption = "") + 
  theme_minimal()
 ##############################

# DISPLAY GRAPHS
A

B 

S

# CLEANUP
rm(df, ds, dx, order)

To explore whether variance in MAKER_GENDER inferences are more likely a function of participant or stimulus , we visualize the MAKER_GENDER data via an alluvial diagram. Here we see that (unlike MAKER_ID) there are a number of stable identifications (ie. paths through each stimulus with the same category) indicating that some participants consistently attributed all stimuli to a particular gender category. It is difficult to determine whether this pattern of results indicates individuals are making stable assumptions about the gender of makers independetly of stimuli (i.e. a state/trait of the participant) or alternatively, if participants are responding to the features of the stimulus and the consistency of responses across stimuli is an artifact of the limited number of gender categories. To further explore inferences about maker gender we will attend to stimuli that show large variance in distribution across gender category levels (e.g. B2-1 (vs) B2-3), especially stimuli that have a larger proportion of ‘FEMALE’ identifications.

3.2.3.2 Maker GENDER by PARTICIPANT GENDER

df <- df_b2

## MAKER GENDER BY STIMULUS AND PARTICIPANT GENDER
## FACETED BAR CHART 
ggplot( df, aes( x = (STIMULUS), fill = fct_rev(MAKER_GENDER))) +
  geom_bar(position = "stack") +
  facet_grid( . ~ D_gender_collapsed, scales = "free", space = "free") + 
  scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1"),name = "MAKER GENDER") + 
  labs( title = "MAKER GENDER by STIMULUS and PARTICIPANT GENDER",
        subtitle = "Facet indicates participant gender", x = "STIMULUS") +
  theme_minimal()

##############################

Note that the same pattern of attribution (stimuli B2-1,2 and 4 primarily male, with B2-3 a higher proportion of females), is consistent across the participant’s own gender identities. It is NOT the case that female participants attribute most stimuli to women, while male participants attribute most stimuli to men. Rather, these results indicate that participants (regardless of their own gender identity) are identifying a maker’s gender based on some features of the stimulus in combination with state/trait-based assumptions about maker gender identities in general. More data are needed to address this question more thoroughly.

3.2.3.3 Maker GENDER Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as GENDER_CONF .

## DESCRIBE MAKER GENDER CONFIDENCE
df <- df_b2 %>% 
  select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) 

desc.gender_conf <- psych::describe(df$GENDER_CONF)

## H 
## Distribution of GENDER_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = GENDER_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Maker GENDER Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.gender_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker gender attribution] ranging from0 to 100 , with a mean of 54. According to (one-sample) student’s t-tests, the mean of the distribution is NOT significantly different (at the 0.01 alpha level) from both reference values of 50% (indicating partial confidence). Relative to MAKER ID and MAKER AGE, participants expressed less confidence in their attribution of MAKER GENDER

3.2.3.4 Maker GENDER Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(GENDER_CONF), 
         max = max(GENDER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_b2 %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(GENDER_CONF), 
         max = max(GENDER_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER GENDER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(x = STIMULUS, y = GENDER_CONF)) + 
  geom_boxplot(width = 0.3) + 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
  labs(
    title = "Confidence in Maker GENDER",
    y = "MAKER GENDER CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.gender_conf$min, " to ", desc.gender_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################



# MAKER_GENDER CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER GENDER CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in maker GENDER differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( r* p_lowvar of the 52 participants.) Note that the participants with low variance in confidence scores are not clustered near the high end of the confidence scale as with confidence in maker ID and maker AGE … rather, low variance is maker GENDER confidence seems to be evenly distributed across the confidence scale. The mean confidence score for each stimulus (blue) are near 50%.

Unlike MAKER ID and MAKER AGE, confidence in MAKER GENDER is generally more idiosyncratic, and potentially reflective of an individual participant’s state/trait inference about maker GENDER in general, rather than responding to particular features of each stimuli. To verify, we will look for stimuli that have strong patterns of difference in relative proportion of each level of the MAKER GENDER variable.

3.2.3.5 Maker GENDER & Confidence (by Stimulus)

df <- df_b2

## MC
## MAKER GENDER CONFIDENCE by IDENTIFICATION  
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF, color = MAKER_GENDER)) + 
  geom_boxplot(position=position_dodge(0.8), width = 0.5)+
  geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
  scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  # scale_color_viridis(discrete=TRUE, option="viridis") +
  labs (title = "", 
        x = "", y = "Maker Gender Confidence") + 
  theme_minimal() 
###################################################


# CONFIDENCE by ID
C <- ggplot(df, aes( x = (MAKER_GENDER), y = GENDER_CONF, color = (MAKER_GENDER))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1"), guide = guide_legend(reverse = TRUE)) +   
  labs (x = "MAKER GENDER") + 
  easy_add_legend_title("MAKER GENDER")+
  theme_minimal() 


# DISPLAY GRAPHS
 (p <- ( B/MC / C)  + plot_annotation(
   title = "MAKER GENDER & CONFIDENCE" ,
   subtitle = '',
   caption = ''))

Inspecting MAKER GENDER Confidence by MAKER GENDER category, there does not appear to be any systematic relationship between the category of Maker GENDER a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of MAKER(GENDER) = MALE , for example, are made with greater or less confidence than identifications of MAKER(GENDER) = FEMALE. Statistical tests of independence (between confidence and maker age) are ill-advised because the number of observations of each maker age vary greatly between stimuli.

Taken together, this lack of variance in GENDER_CONF within-subjects and between stimuli, along with the limited number of categorical choices for the MAKER GENDER variable indicates that the inference identifying the gender generation of the maker (and the participant’s confidence in that inference) may infact be conditioned on some relatively stable state/trait(s) of the individual participant, rather than features of the stimulus. To verify this intuition we will look for stimuli with highly variant distributions of maker gender categories. (e.g. largely female or other,rather than male)

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.gender_conf, p_lowvar)

3.3 TOOL QUESTIONS

3.3.1 TOOL CHOICE

Participants were asked: What tools do you think were most likely used to create this image?
The response was saved as variable TOOL_ID (multi-select)

3.3.1.1 Tool ID by Stimulus

#FILTER DATASET
df <- df_tools %>% filter( str_detect(STIMULUS, "B2"))

## A
## TOOL ID BY STIMULUS AND SAMPLE
## FACETED BAR CHART 
A <- ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = fct_rev(STIMULUS) )) +
  geom_bar(position = "stack") +
  # coord_flip() + 
  facet_grid(Distribution ~ ., scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  # scale_y_continuous(labels = scales::percent) + 
  labs( title = "TOOL ID by Stimulus (grouped by Category)",
        subtitle = "", x = "") +
  easy_add_legend_title("TOOL_ID") +
  theme_minimal() 
##############################

## B
## TOOL ID  BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
# dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
B <-ggbarstats( data = df, 
            x = TOOL_ID, y = STIMULUS,
            legend.title = "TOOL ID",
            results.subtitle = FALSE) + 
    scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
    labs( title = "TOOL ID BY STIMULUS ", subtitle = "", x = "",
          caption = "note: different n across stimuli indicate answers were multiple-select")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################

A

B

##note: can't do sankey on tool_id because it is a multi-select field, so we can't track flow for one person through each node [stimulus]

3.3.1.2 Tool ID Confidence (distribution)

Participants were asked: Please rate your confidence in this choice. The response is stored as TOOL_CONF .

## DESCRIBE TOOL CONFIDENCE
df <- df_tools %>% 
  filter(str_detect(STIMULUS,"B2")) %>% 
  select(PID, STIMULUS, TOOL_CONF) %>% 
  summarize( .by=c(PID,STIMULUS),
    TOOL_CONF = mean(TOOL_CONF) #this is a dummy just to collapse b/c the values will all be the same
  )

desc.tool_conf <- psych::describe(df$TOOL_CONF)

## H 
## Distribution of MAKER_CONF 
## GGSTATS HISTOGRAM   
H <- gghistostats(df, x = TOOL_CONF, 
              test.value = 50,
              binwidth = 5,
              centrality.plotting = TRUE,
              centrality.type = "parametric",
              bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
             ) + 
  labs(title = "Distribution of Tool ID Confidence (Block 2)",
       caption = paste0("test value = 50; sd = ", round(desc.tool_conf$sd,0))) + 
  theme_minimal()
##############################

# DISPLAY GRAPHS
H

# CLEANUP 
rm(df)

Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their tool ID identification] ranging from0 to 100 , with a mean of 71.4. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty). Relative to MAKER_ID, MAKER_AGE AND MAKER_GENDER, participants were more confident in their identification of the tools used to create a particular image.

3.3.1.3 Tool ID Confidence (by Stimulus)

#TEMP FILTER DATAFRAME
temp  <- df_tools %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, TOOL_ID, TOOL_CONF) %>% 
  group_by(PID) %>% 
  summarise(
         min = min(TOOL_CONF), 
         max = max(TOOL_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE)
  )

#proportion of low variance participants 
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)

#FILTER DATAFRAME
df <- df_tools %>% 
  filter(str_detect(STIMULUS, "B2")) %>% 
  select(PID, STIMULUS, TOOL_ID, TOOL_CONF) %>% 
  group_by(PID) %>% 
  mutate(
         min = min(TOOL_CONF), 
         max = max(TOOL_CONF),
         range = max-min,
         low_var = ifelse(range < 20, TRUE, FALSE))
  


## I
## MAKER GENDER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT 
I <- ggplot(df, aes(x = STIMULUS, y = TOOL_CONF)) + 
  geom_boxplot(width = 0.3) + 
  stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
  stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE, 
               vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
  geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
  scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
  geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
  labs(
    title = "Confidence in TOOL",
    y = "TOOL CONFIDENCE (%)",
    x = "Stimulus",
    subtitle = paste0("Scores range from ",desc.tool_conf$min, " to ", desc.tool_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
    caption = "lines connect observations from the same participant 
    red indicates participants with less than 20 points range between confidence scores
    (i.e. participants whose confidence is stable, invariant across stimuli)") + 
   theme_minimal() + easy_remove_legend()
##############################



# MAKER_GENDER CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF)) + 
#   geom_boxplot(width = 0.5)+
#   geom_jitter(width = 0.10, alpha = 0.4) +
#   labs (title = "",
#         x = "", y = "MAKER GENDER CONFIDENCE") + 
#   theme_minimal() 

# DISPLAY GRAPHS
I

# CLEANUP
rm(df, temp)

To explore whether variance in confidence in TOOL_ID differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( r* p_lowvar of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale (like the pattern of confidence in MAKER_ID). Across all stimuli, TOOL_ID confidence scores are higher than confidence for the maker identifications. The mean confidence score for each stimulus (blue) are all greater than 50%.

While there are some insubstantial number of participants who have low variance in tool id confidence, the range of confidence scores is smaller than those for other identifications. This makes it difficult to determine if participants have true high confidence in their choices based on each stimulus, or have some stable confidence across all stimuli.

3.3.1.4 Tool ID & Confidence by Stimulus

# FILTER DATAFRAME
df <- df_tools %>% filter( str_detect(STIMULUS, "B2"))

######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = (STIMULUS), fill = fct_rev(TOOL_ID))) + 
  geom_bar(position = "stack", width = 0.8) +
  scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
  # facet_grid( df$ENCOUNTER) + 
  labs (x = "") +
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

# TOOL_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = (STIMULUS), y = TOOL_CONF, color = fct_rev(TOOL_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_paletteer_d("awtools::a_palette", direction = 1) +
  labs (x = "STIMULUS") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

# TOOL_CONFIDENCE by CHOICE
C <- ggplot(df, aes( x = (TOOL_ID), y = TOOL_CONF, color = fct_rev(TOOL_ID))) + 
  geom_boxplot(position=position_dodge(0.9), width = 0.6)+
  geom_jitter(position=position_jitterdodge(), alpha = 0.2) + 
  scale_color_paletteer_d("awtools::a_palette", direction = 1) +
  labs (x = "TOOL_ID") + 
  easy_add_legend_title("TOOL ID")+
  theme_minimal() 

(p <- (a / b / C )  + plot_annotation(
  title = 'TOOL ID & CONFIDENCE',
  subtitle = '',
  caption = ''))

###################################################

Inspecting TOOL_ID Confidence by TOOL_ID category, there does not appear to be any systematic relationship between the category of TOOL_ID a participant chooses for a stimulus, and their confidence in that choice (i.e. It is not the case that identifications of TOOL(ID) = DESIGN_ADVANCED , for example, are made with greater or less confidence than identifications of TOOL(ID) = VIZ_BASIC. There are relatively few instances where participants chose the ‘?’ or “unknown” option.

rm(df, A, B, C, S, H, I, MC, p)
rm(desc.tool_conf, p_lowvar)

3.4 BEHAVIOUR QUESTIONS

3.4.1 ENCOUNTER CHOICE

3.4.1.1 Encounter Choice by Stimulus

#FILTER DATASET
df <- df_graphs %>% filter(Assigned.Block == 2)

# ENCOUNTER CHOICE BY STIMULUS AND SAMPLE
ggplot( df, aes( x = (STIMULUS), fill = ENCOUNTER)) +
  geom_bar(position = "stack") +
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  labs( title = "MAKER ENCOUNTER by Stimulus",
        subtitle = "", x = "") +
  scale_fill_brewer(palette = "Dark2") + 
  coord_flip()+
  # easy_add_legend_title("") +
  theme_minimal()

#ENCOUNTER CHOICE
## HORIZONTAL BAR CHART 
ggplot( df, aes( x = fct_rev(STIMULUS), fill = (ENCOUNTER))) +
  geom_bar(position = "stack") +
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") + 
  labs( title = "ENCOUNTER CHOICE by Stimulus",
        subtitle = "", x = "") +
  scale_fill_brewer(palette = "Dark2") + 
  coord_flip()+
  easy_add_legend_title("") +
  theme_minimal()

#ENCOUNTER CHOICE 
## GGSTATSPLOT
ggbarstats( data = df, x = "ENCOUNTER", y = "STIMULUS",
                 legend.title = "ENCOUNTER")  +
  scale_fill_brewer(palette = "Dark2") + 
      labs( title = "ENCOUNTER BY STIMULUS ", subtitle = "", x = "") 
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

3.4.2 ACTION CHOICE

3.4.2.1 Actions Choice by Stimulus

#FILTER DATASET
df <- df_actions %>% filter( str_detect(STIMULUS, "B2"))

# ACTION CHOICE BY STIMULUS AND SAMPLE
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
  geom_bar(position = "stack") +
  coord_flip() + 
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  # scale_y_continuous(labels = scales::percent) + 

  labs( title = "Chart Action by Stimulus",
        subtitle = "", x = "") +
  easy_add_legend_title("ACTION") +
  theme_minimal() 

# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
  geom_bar(position = "stack") +
  coord_flip() + 
  facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") + 
  scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
  # scale_y_continuous(labels = scales::percent) + 

  labs( title = "Chart Action by Stimulus",
        subtitle = "", x = "") +
  easy_add_legend_title("ACTION") +
  theme_minimal() 

#ACTION CHOICE CHOICE BY STIMULUS
## GGSTATSPLOT
ggbarstats( data = df, x = "CHART_ACTION", y = "STIMULUS",
                 legend.title = "CHART ACTION")  +
  scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
      labs( title = "CHART ACTION BY STIMULUS ", subtitle = "", x = "") 
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

3.4.2.2 TODO PICK UP HERE

3.4.2.3 TODO ALSO GROUP VS. INDIVIDUAL DIFFERENCES

ie. mixed model, but also look at stability of individual choice across stimuli maybe treat individual as fixed factor and look at amount of variance explained?

3.4.2.4 TODO CONFIDENCE

revisit ggwithin and grouped within, which require a continuous outcome

# ggwithinstats(
#   data = df %>% filter(STIMULUS %in% c("B2-1","B2-2")),
#   x = STIMULUS,
#   y = MAKER_ID,
#   type = "robust", ## type of statistical test
#   xlab = "MAKER_ID", ## label for the x-axis
#   ylab = "MAKER_CONF", ## label for the y-axis
#   # package = "yarrr", ## package from which color palette is to be taken
#   # palette = "info2", ## choosing a different color palette
#   title = "MAKER ID BY MAKER_CONF"
# ) 

3.5 TODO CHART QUESTIONS

3.5.1 CHART BEAUTY

3.5.1.1 Chart Beauty by Stimulus and Category

df <- df_graphs

qacBase::qstats(df, CHART_BEAUTY, STIMULUS) %>% arrange(mean)
##    STIMULUS   n  mean    sd
## 1      B5-1  53 27.23 22.54
## 2      B4-2  54 28.24 26.07
## 3      B3-1  52 28.56 22.96
## 4      B3-3  52 31.54 27.19
## 5      B2-2  52 31.73 26.11
## 6      B1-3  55 38.13 32.33
## 7      B6-4  52 40.12 29.57
## 8      B1-4  55 45.18 28.97
## 9      B1-2  55 46.09 25.28
## 10     B6-2  52 46.12 27.68
## 11     B5-2  53 49.17 22.59
## 12     B0-0 318 49.67 26.95
## 13     B5-3  53 50.19 27.54
## 14     B4-1  54 53.09 27.92
## 15     B2-1  52 55.13 25.80
## 16     B1-1  55 55.75 29.37
## 17     B6-1  52 56.06 24.03
## 18     B3-2  52 57.25 25.32
## 19     B5-4  53 57.81 26.61
## 20     B2-4  52 61.62 26.60
## 21     B4-4  54 61.70 28.12
## 22     B6-3  52 62.19 27.48
## 23     B3-4  52 63.60 27.31
## 24     B4-3  54 66.56 20.81
## 25     B2-3  52 74.23 24.85
qacBase::qstats(df, CHART_BEAUTY, STIMULUS_CATEGORY) %>% arrange(mean)
##   STIMULUS_CATEGORY   n  mean    sd
## 1                 B 318 43.05 27.26
## 2                 A 318 46.05 28.45
## 3                 F 318 49.67 26.95
## 4                 C 318 53.73 30.80
## 5                 D 318 54.96 29.11
# 
# 
# ggplot(df, aes(x = CHART_BEAUTY)) + 
#   geom_histogram(bins = 20) +
#   facet_grid(BLOCK ~ STIMULUS_CATEGORY, scales = "free", space = "free", drop=TRUE) + 
#   theme_minimal()
# 
# 
# ggplot(df, aes(x = CHART_BEAUTY)) + 
#   geom_histogram(bins = 20) +
#   facet_grid( ~ fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free", drop=TRUE) + 
#   theme_minimal()
# 



##ggstatsplot BY CATEGORY
grouped_gghistostats(
  data = df_graphs %>% filter(STIMULUS != "B0-0"),
  x = CHART_BEAUTY, ## same outcome variable
  grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
  type = "robust", ## robust test: one-sample percentile bootstrap
  test.value = 50, ## test value against which sample mean is to be compared
  centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
  # ggtheme = ggthemes::theme_stata(), ## changing default theme
  ## turn off ggstatsplot theme layer
  ## arguments relevant for combine_plots
  annotation.args = list(
    title = "DISTRIBUTION of Chart Beauty by Cateogry",
    caption = ""
  ),
  plotgrid.args = list(nrow = 6)
)

#B0-0 
gghistostats(
  data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
  x = CHART_BEAUTY, ## numeric variable
  xlab = "CHART BEAUTY", ## x-axis label
  title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
  test.value = 50, ## test value
  caption = ""
)  

3.5.2 CHART TRUST

3.5.2.1 Chart Trust by Stimulus and Category

df <- df_graphs

qacBase::qstats(df, CHART_TRUST, STIMULUS) %>% arrange(mean)
##    STIMULUS   n  mean    sd
## 1      B1-3  55 30.98 27.33
## 2      B3-3  52 39.92 27.16
## 3      B1-4  55 45.45 23.37
## 4      B2-4  52 47.94 26.74
## 5      B5-3  53 49.15 24.83
## 6      B4-2  54 50.50 21.10
## 7      B0-0 318 50.79 20.05
## 8      B6-4  52 52.00 22.13
## 9      B5-4  53 53.68 21.04
## 10     B6-3  52 54.37 21.55
## 11     B2-2  52 54.52 22.15
## 12     B4-4  54 55.61 24.54
## 13     B6-1  52 56.52 19.00
## 14     B1-2  55 56.98 22.19
## 15     B6-2  52 57.50 21.89
## 16     B1-1  55 58.04 22.54
## 17     B3-2  52 58.52 18.56
## 18     B3-4  52 59.00 20.68
## 19     B5-1  53 61.77 23.68
## 20     B5-2  53 62.36 20.57
## 21     B2-1  52 62.44 20.45
## 22     B3-1  52 63.31 16.86
## 23     B2-3  52 66.00 22.10
## 24     B4-1  54 66.24 22.82
## 25     B4-3  54 72.48 19.94
qacBase::qstats(df, CHART_TRUST, STIMULUS_CATEGORY) %>% arrange(mean)
##   STIMULUS_CATEGORY   n  mean    sd
## 1                 F 318 50.79 20.05
## 2                 C 318 52.07 27.81
## 3                 D 318 52.24 23.45
## 4                 B 318 56.71 21.27
## 5                 A 318 61.39 21.15
##ggstatsplot BY CATEGORY
grouped_gghistostats(
  data = df_graphs %>% filter(STIMULUS != "B0-0"),
  x = CHART_TRUST, ## same outcome variable
  grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
  type = "robust", ## robust test: one-sample percentile bootstrap
  test.value = 50, ## test value against which sample mean is to be compared
  centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
  # ggtheme = ggthemes::theme_stata(), ## changing default theme
  ## turn off ggstatsplot theme layer
  ## arguments relevant for combine_plots
  annotation.args = list(
    title = "DISTRIBUTION of Chart TRUST by Cateogry",
    caption = ""
  ),
  plotgrid.args = list(nrow = 6)
)

#B0-0 
gghistostats(
  data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
  x = CHART_TRUST, ## numeric variable
  xlab = "CHART TRUST", ## x-axis label
  title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
  test.value = 50, ## test value
  caption = ""
)  

3.6 SEMANTIC DIFFERENTIALS

4 STASH

wip code stash

4.1 MAKER Donuts

#### DEFINE SET 
stimulus  = "B2-1"
df <- df_graphs %>% filter(STIMULUS == stimulus)

#### GENERATE GRAPHS

  #MAKER_ID-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "reds",
         main = paste0(stimulus, " MAKER ID")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  # #MAKER_GENDER-DONUT
  # PieChart(MAKER_GENDER, data = df,
  #        fill = "blues",
  #        main = paste0(stimulus, " MAKER GENDER")) + theme_minimal()
  # 
  # 
  # #MAKER_AGE-DONUT
  # PieChart(MAKER_AGE, data = df,
  #        fill = "olives",
  #        main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
  
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "rusts",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "olives",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "greens",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "emeralds",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "turquoises",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "aquas",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-MAKER_ID
  PieChart(MAKER_ID, data = df,
         fill = "purples",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "magentas",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "violets",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
  #MAKER_AGE-DONUT
  PieChart(MAKER_ID, data = df,
         fill = "grays",
         main = paste0(stimulus, " MAKER AGE")) + theme_minimal()

## >>> suggestions
## PieChart(MAKER_ID, hole=0)  # traditional pie chart
## PieChart(MAKER_ID, values="%")  # display %'s on the chart
## PieChart(MAKER_ID)  # bar chart
## Plot(MAKER_ID)  # bubble plot
## Plot(MAKER_ID, values="count")  # lollipop plot 
## 
## --- MAKER_ID --- 
## 
##     MAKER_ID Count   Prop 
## ------------------------- 
##   individual    0   0.000 
## organization    1   0.019 
##         news   11   0.212 
##    education   19   0.365 
##    political    4   0.077 
##     business   17   0.327 
## ------------------------- 
##        Total   52   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 38.923, df = 5, p-value = 0.000
## NULL
# "reds"    h   0
# "rusts"   h   30
# "browns"  h   60
# "olives"  h   90
# "greens"  h   120
# "emeralds"    h   150
# "turquoises"  h   180
# "aquas"   h   210
# "blues"   h   240
# "purples" h   270
# "violets" h   300
# "magentas"    h   330
# "grays"

4.2 Alluvial Plots

## EXAMPLE ALLUVIAL PLOT USING GGALUVIAL  (instead of GGSANKEY)
# https://corybrunson.github.io/ggalluvial/articles/ggalluvial.html

# #FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
# ds <- df_graphs %>% 
#   filter(str_detect(STIMULUS, "B2")) %>% 
#   select(STIMULUS, MAKER_ID, PID) %>% 
#   mutate(
#     MAKER_ID = fct_relevel(MAKER_ID, 
#               c("business","education","individual", "news","organization", "political" ))
#   )
# 
# ds %>% 
#   ggplot(aes( x = STIMULUS,
#               stratum = MAKER_ID,
#               label = MAKER_ID,
#               alluvium = PID)) +
#       stat_alluvium(aes(fill = MAKER_ID),
#                     width = 0,
#                     alpha = 1,
#                     geom = "flow")+
#       geom_stratum(width = 0.2, aes(fill= MAKER_ID))+
#       # geom_text(stat = "stratum", size = 5, angle = 90)+
#       scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE,
#                      alpha = 1) +
#       theme_minimal()